# === load relevant libraries
library(data.table)
library(ggplot2)
library(DescTools)
library(plm)
library(lfe)
library(dplyr)

# === First, compute cumulative flow and return paths for top 1 - bottom 1, and top 3 - bottom 3 paths
rm(list = ls())

# in each month, sort styles by exponential sum of ratings into bin = 1 to 9
sortData = readRDS('input_data/morningstar_style_data.RDS')
sortData = sortData[, list(yyyymm, category, expSum_1)]
sortData = sortData[order(yyyymm, expSum_1)]
for (this in unique(sortData[, yyyymm])){
  sortData[yyyymm == this, bin := 1:9]
}
rm(this)

# read data: style portfolios split into three based on holding intensity of mutual funds
data = readRDS('input_data/morningstar_style_data_by_terciles.RDS')
data = data[, list(yyyymm, category, bin_held, ret)]

# get subsequent 36 months of returns
tmp = unique(data[, list(yyyymm)])
tmp[, idx := 1:nrow(tmp)]
data = merge(data, tmp); rm(tmp)
out = data.table()
for (i in 0:36){
  out = rbind(out, data[, list(idx = idx-i, hor = i, category, bin_held, ret)])
}
out = merge(out, unique(data[, list(idx, yyyymm)]), by = 'idx')
out = merge(out, sortData[, list(yyyymm, category, bin)], by = c('yyyymm','category'), all.x = T)
out = out[0 == rowSums(is.na(out))]
out[, idx := NULL]
data = copy(out); rm(out, sortData, i); gc()

# compute top minus bottom for before and after 2002
data[, period := ifelse(yyyymm > 200206, '2_after 2002', '1_before 2002')]
data = data[, list(ret = mean(ret)), list(bin, hor, bin_held, period)]
data = merge(data[bin == 9, list(hor, bin_held, period, l_ret = ret)],
             data[bin == 1, list(hor, bin_held, period, s_ret = ret)], by = c('hor','bin_held','period'))
data = data[, list(hor, bin_held, period, ret = l_ret - s_ret)]

# compute cumulative returns
data = data[order(hor)]
for (thisPeriod in unique(data[, period])){
  for (thisBin in 1:3){
    data[(period == thisPeriod) & (bin_held == thisBin), cumret := cumsum(ret)]
  }
}
rm(thisPeriod, thisBin)

# append bootstrapped standard errors
tmp = readRDS('input_data/style_price_pressure_bootstraps_terciles.RDS')
data = merge(data, tmp, by = c('hor','period','bin_held')); rm(tmp)
data[, lab := ifelse(bin_held == 3, 'Highest mutual fund holding',ifelse(bin_held == 2, 'Mid mutual fund holding','Lowest mutual fund holding'))]
data_bk = copy(data)

# plot: before 2002
data = copy(data_bk[period == '1_before 2002'])
ggplot(data, aes(x = hor, y = cumret, fill = reorder(lab, -bin_held))) + geom_line(aes(color = reorder(lab, -bin_held)), lwd = 1) + 
  geom_ribbon(data = data[bin_held %in% c(1,3)], aes(ymin = cumret - 1.96*se, ymax = cumret + 1.96*se), alpha = .2) + theme_classic() + 
  labs(x = 'Months', y = 'Cumulative return: top minus bottom style') + 
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + 
  theme(legend.title = element_blank(), legend.position = c(.25, .2)) + geom_hline(yintercept = 0, lty = 3) + 
  coord_cartesian(ylim = c(-.08, .15))

# plot: after 2002
data = copy(data_bk[period == '2_after 2002'])
ggplot(data, aes(x = hor, y = cumret, fill = reorder(lab, -bin_held))) + geom_line(aes(color = reorder(lab, -bin_held)), lwd = 1) + 
  geom_ribbon(data = data[bin_held %in% c(1,3)], aes(ymin = cumret - 1.96*se, ymax = cumret + 1.96*se), alpha = .2) + theme_classic() + 
  labs(x = 'Months', y = 'Cumulative return: top minus bottom style') + 
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + 
  theme(legend.title = element_blank(), legend.position = c(.8, .8)) + geom_hline(yintercept = 0, lty = 3) + 
  coord_cartesian(ylim = c(-.08, .15))
